Descriptive Statistics

  1. Descriptive Statistics and Graphs
  2. Number of Tweets (Total)
  3. Number of Tweets (Time Series)
  4. Gender Distribution
  5. Language Distribution
  6. Follower Counts
  7. Client Usage (Android, iPhone, web etc.)

Jupyter Notebook Style

Let's make this thing look nice.


In [1]:
from IPython.core.display import HTML
styles = open("../css/custom.css", "r").read()
HTML(styles)


Out[1]:

In [2]:
import pandas as pd
import numpy as np
import cPickle as pickle
import json

Read Pickle File with Tweets

Path to the pickle file where previously fetched tweets are.


In [12]:
jsonpath = '/Users/rcn/Desktop/twitter-analysis/data/raw/tweets.json'

In [21]:
tweets = pd.read_json(jsonpath, orient='records')

In [20]:
tweets = pd.io.json.json_normalize(jsonpath)


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-20-3af8a8c2f76d> in <module>()
----> 1 tweets = pd.io.json.json_normalize(jsonpath)

/Users/rcn/anaconda/lib/python2.7/site-packages/pandas/io/json.pyc in json_normalize(data, record_path, meta, meta_prefix, record_prefix)
    705 
    706     if record_path is None:
--> 707         if any([isinstance(x, dict) for x in compat.itervalues(data[0])]):
    708             # naive normalization, this is idempotent for flat records
    709             # and potentially will inflate the data considerably for

/Users/rcn/anaconda/lib/python2.7/site-packages/pandas/compat/__init__.pyc in itervalues(obj, **kwargs)
    146     func = getattr(obj, "itervalues", None)
    147     if not func:
--> 148         func = obj.values
    149     return func(**kwargs)
    150 

AttributeError: 'str' object has no attribute 'values'

Number of Tweets


In [15]:
print('We have %d tweets in total' % len(tweets))


We have 6727 tweets in total

In [22]:
twitterData = pd.DataFrame(tweets)

In [23]:
twitterData.head()


Out[23]:
contributors coordinates created_at entities extended_entities favorite_count favorited geo id id_str ... quoted_status quoted_status_id quoted_status_id_str retweet_count retweeted retweeted_status source text truncated user
0 NaN None 2016-02-04 11:24:31 {u'symbols': [], u'user_mentions': [], u'hasht... {u'media': [{u'expanded_url': u'http://twitter... 19 False None 695206295205359616 695206295205359616 ... NaN NaN NaN 16 False NaN <a href="http://twitter.com" rel="nofollow">Tw... 14-year-old Praveen dreams of protecting his c... False {u'id': 66935683, u'id_str': u'66935683'}
1 NaN None 2016-02-03 09:04:13 {u'symbols': [], u'user_mentions': [{u'indices... {u'media': [{u'source_user_id': 2831687275, u'... 0 False None 694808599592185857 694808599592185856 ... NaN NaN NaN 5 False {u'contributors': None, u'truncated': False, u... <a href="http://twitter.com" rel="nofollow">Tw... RT @WaterAidIndia: @UNICEFIndia's @MamitaBora ... False {u'id': 66935683, u'id_str': u'66935683'}
2 NaN None 2016-02-03 05:48:45 {u'symbols': [], u'user_mentions': [], u'hasht... {u'media': [{u'expanded_url': u'http://twitter... 27 False None 694759407310090241 694759407310090240 ... NaN NaN NaN 23 False NaN <a href="http://twitter.com" rel="nofollow">Tw... Saloni is 17. She believes that women's safety... False {u'id': 66935683, u'id_str': u'66935683'}
3 NaN None 2016-02-02 14:05:21 {u'symbols': [], u'user_mentions': [], u'hasht... {u'media': [{u'expanded_url': u'http://twitter... 6 False None 694521994956578816 694521994956578816 ... NaN NaN NaN 5 False NaN <a href="http://twitter.com" rel="nofollow">Tw... "Hello! How are you?", says Pawan Vishwakarma... False {u'id': 66935683, u'id_str': u'66935683'}
4 NaN None 2016-01-31 05:19:40 {u'symbols': [], u'user_mentions': [], u'hasht... {u'media': [{u'expanded_url': u'http://twitter... 24 False None 693664923159629824 693664923159629824 ... NaN NaN NaN 24 False NaN <a href="http://twitter.com" rel="nofollow">Tw... 3 Youth, Children used whistles to create awar... False {u'id': 66935683, u'id_str': u'66935683'}

5 rows × 29 columns


In [24]:
text =


Out[24]:
entities
0 {u'symbols': [], u'user_mentions': [], u'hasht...
1 {u'symbols': [], u'user_mentions': [{u'indices...
2 {u'symbols': [], u'user_mentions': [], u'hasht...
3 {u'symbols': [], u'user_mentions': [], u'hasht...
4 {u'symbols': [], u'user_mentions': [], u'hasht...
5 {u'symbols': [], u'user_mentions': [], u'hasht...
6 {u'symbols': [], u'user_mentions': [{u'indices...
7 {u'symbols': [], u'user_mentions': [], u'hasht...
8 {u'symbols': [], u'user_mentions': [], u'hasht...
9 {u'symbols': [], u'user_mentions': [], u'hasht...
10 {u'symbols': [], u'user_mentions': [], u'hasht...
11 {u'symbols': [], u'user_mentions': [{u'indices...
12 {u'symbols': [], u'user_mentions': [], u'hasht...
13 {u'symbols': [], u'user_mentions': [{u'indices...
14 {u'symbols': [], u'user_mentions': [{u'indices...
15 {u'symbols': [], u'user_mentions': [{u'indices...
16 {u'symbols': [], u'user_mentions': [], u'hasht...
17 {u'symbols': [], u'user_mentions': [], u'hasht...
18 {u'symbols': [], u'user_mentions': [], u'hasht...
19 {u'symbols': [], u'user_mentions': [{u'indices...
20 {u'symbols': [], u'user_mentions': [{u'indices...
21 {u'symbols': [], u'user_mentions': [{u'indices...
22 {u'symbols': [], u'user_mentions': [{u'indices...
23 {u'symbols': [], u'user_mentions': [{u'indices...
24 {u'symbols': [], u'user_mentions': [{u'indices...
25 {u'symbols': [], u'user_mentions': [{u'indices...
26 {u'symbols': [], u'user_mentions': [{u'indices...
27 {u'symbols': [], u'user_mentions': [{u'indices...
28 {u'symbols': [], u'user_mentions': [{u'indices...
29 {u'symbols': [], u'user_mentions': [{u'indices...
... ...
6697 {u'symbols': [], u'user_mentions': [], u'hasht...
6698 {u'symbols': [], u'user_mentions': [], u'hasht...
6699 {u'symbols': [], u'user_mentions': [{u'indices...
6700 {u'symbols': [], u'user_mentions': [], u'hasht...
6701 {u'symbols': [], u'user_mentions': [], u'hasht...
6702 {u'symbols': [], u'user_mentions': [], u'hasht...
6703 {u'symbols': [], u'user_mentions': [], u'hasht...
6704 {u'symbols': [], u'user_mentions': [{u'indices...
6705 {u'symbols': [], u'user_mentions': [{u'indices...
6706 {u'symbols': [], u'user_mentions': [{u'indices...
6707 {u'symbols': [], u'user_mentions': [{u'indices...
6708 {u'symbols': [], u'user_mentions': [{u'indices...
6709 {u'symbols': [], u'user_mentions': [], u'hasht...
6710 {u'symbols': [], u'user_mentions': [{u'indices...
6711 {u'symbols': [], u'user_mentions': [{u'indices...
6712 {u'symbols': [], u'user_mentions': [], u'hasht...
6713 {u'symbols': [], u'user_mentions': [{u'indices...
6714 {u'symbols': [], u'user_mentions': [{u'indices...
6715 {u'symbols': [], u'user_mentions': [{u'indices...
6716 {u'symbols': [], u'user_mentions': [{u'indices...
6717 {u'symbols': [], u'user_mentions': [], u'hasht...
6718 {u'symbols': [], u'user_mentions': [], u'hasht...
6719 {u'symbols': [], u'user_mentions': [{u'indices...
6720 {u'symbols': [], u'user_mentions': [{u'indices...
6721 {u'symbols': [], u'user_mentions': [{u'indices...
6722 {u'symbols': [], u'user_mentions': [], u'hasht...
6723 {u'symbols': [], u'user_mentions': [], u'hasht...
6724 {u'symbols': [], u'user_mentions': [], u'hasht...
6725 {u'symbols': [], u'user_mentions': [], u'hasht...
6726 {u'symbols': [], u'user_mentions': [], u'hasht...

6727 rows × 1 columns


In [18]:
twitterData.dtypes


Out[18]:
contributors                        float64
coordinates                          object
created_at                   datetime64[ns]
entities                             object
extended_entities                    object
favorite_count                        int64
favorited                              bool
geo                                  object
id                                    int64
id_str                                int64
in_reply_to_screen_name              object
in_reply_to_status_id               float64
in_reply_to_status_id_str           float64
in_reply_to_user_id                 float64
in_reply_to_user_id_str             float64
is_quote_status                        bool
lang                                 object
place                                object
possibly_sensitive                  float64
quoted_status                        object
quoted_status_id                    float64
quoted_status_id_str                float64
retweet_count                         int64
retweeted                              bool
retweeted_status                     object
source                               object
text                                 object
truncated                              bool
user                                 object
dtype: object

In [10]:
#twitterData.twitter_mentions=
#twitterData[30:45].twitter_mentions.str.split(',').astype(list).astype('str')
#twitterData.twitter_mentions_list=twitterData.twitter_mentions.str.split(',').astype(list).astype('str')
#twitterData.twitter_mentions_list=twitterData.twitter_mentions.apply(lambda x: list(str(x).split(',')))
#twitterData.twitter_mentions_list[40:45].get_values()
twitterData.entities.user_mentions[40:45].get_values()


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-10-96fa9155f834> in <module>()
      4 #twitterData.twitter_mentions_list=twitterData.twitter_mentions.apply(lambda x: list(str(x).split(',')))
      5 #twitterData.twitter_mentions_list[40:45].get_values()
----> 6 twitterData.entities.user_mentions[40:45].get_values()

/Users/rcn/anaconda/lib/python2.7/site-packages/pandas/core/generic.pyc in __getattr__(self, name)
   2358                 return self[name]
   2359             raise AttributeError("'%s' object has no attribute '%s'" %
-> 2360                                  (type(self).__name__, name))
   2361 
   2362     def __setattr__(self, name, value):

AttributeError: 'Series' object has no attribute 'user_mentions'

In [10]:
pd.Series.get_values


Out[10]:
<unbound method Series.get_values>

In [8]:
(twitterData.twitter_mentions_list[44:45]).get_values()[0][0]


Out[8]:
'nan'

In [9]:
twitterData.describe()


Out[9]:
gender_prob followers friends Campaign Discrimination Prevention Testing links_domain
count 1024.000000 1739.000000 1739.000000 1739.000000 1739.000000 1739.000000 1739.000000 0
mean 0.964005 1582.530190 815.294997 0.002300 0.869465 0.125934 0.002300 NaN
std 0.105735 6661.569022 2784.618818 0.047919 0.336988 0.331871 0.047919 NaN
min 0.500000 1.000000 5.000000 0.000000 0.000000 0.000000 0.000000 NaN
25% 0.993567 280.500000 185.000000 0.000000 1.000000 0.000000 0.000000 NaN
50% 0.997935 541.000000 332.000000 0.000000 1.000000 0.000000 0.000000 NaN
75% 1.000000 1071.000000 665.500000 0.000000 1.000000 0.000000 0.000000 NaN
max 1.000000 178935.000000 83003.000000 1.000000 1.000000 1.000000 1.000000 NaN

All Tweets

Number of Tweets


In [10]:
nTweets = len(twitterData.index)
print "There are", nTweets, "tweets in the full dataset"


There are 1740 tweets in the full dataset

Number of Tweets Over Time


In [11]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

In [9]:
#twitterData['Friends'].plot()

In [12]:
from bokeh.plotting import *
output_notebook()


BokehJS successfully loaded.

In [13]:
tweets = twitterData.id.count()
tweets


Out[13]:
1740

In [14]:
from ggplot import *
%matplotlib inline

In [18]:
figure(
    title='Number of Tweets',        # Plot title
    title_text_font='Courier New',  # Title font
    title_text_color='#5d6263',     # Title font colour
    plot_width=1000,                # Plot width
    plot_height=600,                # Plot height
    background_fill='#f6f6f6',      # Background colour
    border_fill='#f6f6f6',          # Border background
    border_symmetry='hv',           # h=horizontal, v=vertical
    outline_line_color='#f6f6f6',   # Plot area border colour
    x_axis_type = 'datetime',       # For timeseries only
    tools='pan,box_zoom,previewsave,resize,select,reset' # Available: pan,wheel_zoom,box_zoom,previewsave,resize,select,reset
)
hold()
line(
    twitterData.time,             # x
    twitterData.id,                 # y
    color='#00aeef',                # Line colour
    line_width=3,                   # Line width in px
    legend='Tweets',                 # Legend label
)
legend().label_text_font='Courier New'
legend().label_text_color='#5d6263'
legend().label_outline_line_color='#f6f6f6'
yaxis().axis_line_color = None
xaxis().axis_line_color = '#d4d4d4'
axis().major_label_text_font="Courier New"
axis().major_label_text_font_size="12pt"
xgrid().grid_line_color = None
ygrid().grid_line_color = "#d4d4d4"
ygrid().grid_line_width = 1
show()



In [22]:
output_file("friends.html", title="timeseries example")
hold()
line(
    twitterData['time'],                                       # x coordinates
    twitterData['friends'],                                  # y coordinates
    color='#A6CEE3',                                    # set a color for the line
    legend='Friends',                                      # attach a legend label
    x_axis_type = "datetime",                           # NOTE: only needed on first
    tools="pan,wheel_zoom,box_zoom,reset,previewsave"   # NOTE: only needed on first
)


Out[22]:
<bokeh.objects.Plot at 0x7fb138f93810>

In [23]:
x = twitterData['time']
y = twitterData.cumsum()
line(x,y, color="#0000FF", tools=[])
show()


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-23-40910ed1e536> in <module>()
      1 x = twitterData['time']
----> 2 y = twitterData.cumsum()
      3 line(x,y, color="#0000FF", tools=[])
      4 show()

/home/ubuntu/anaconda/lib/python2.7/site-packages/pandas/core/generic.pyc in func(self, axis, dtype, out, skipna, **kwargs)
   3921                     if skipna:
   3922                         np.putmask(y, mask, mask_a)
-> 3923                     result = accum_func(y, axis)
   3924                     if skipna:
   3925                         np.putmask(result, mask, mask_b)

/home/ubuntu/anaconda/lib/python2.7/site-packages/pandas/core/generic.pyc in <lambda>(y, axis)
   3938             np.inf, np.nan)
   3939         cls.cumsum = _make_cum_function(
-> 3940             'sum', lambda y, axis: y.cumsum(axis), 0., np.nan)
   3941         cls.cumprod = _make_cum_function(
   3942             'prod', lambda y, axis: y.cumprod(axis), 1., np.nan)

TypeError: coercing to Unicode: need string or buffer, float found

Documents by Data Source


In [29]:
# Languages included in taxonomy: en, hi, ur, sw, ha, ig, yo
nDataSiftType = twitterData.type.value_counts(normalize=True, sort=True, ascending=False, bins=None)
nDataSiftType[0:10]


Out[29]:
twitter    0.999425
dtype: float64

Documents by DataSift Language


In [25]:
# Languages included in taxonomy: en, hi, ur, sw, ha, ig, yo
nDataSiftLanguage = twitterData.datasift_lang.value_counts(normalize=True, sort=True, ascending=False, bins=None)
nDataSiftLanguage[0:10]


Out[25]:
pt    0.822414
en    0.041379
la    0.033333
ts    0.005747
fr    0.002874
cs    0.002874
zu    0.002874
rm    0.002874
da    0.002874
gl    0.002299
dtype: float64

Documents by Twitter Language


In [26]:
nTwitterLanguage = twitterData.twitter_lang.value_counts(normalize=True, sort=True, ascending=False, bins=None)
nTwitterLanguage[0:10]


Out[26]:
pt     0.934483
es     0.019540
in     0.012644
et     0.006897
it     0.005747
fr     0.005172
en     0.005172
und    0.002299
sl     0.001724
hr     0.001724
dtype: float64

Number of Documents by Twitter Location


In [27]:
# Reminder: Locations we are interested in are "IN", "PK", "NG", and "KE".
nLocation = twitterData.twitter_location.value_counts(normalize=False, sort=True, ascending=False, bins=None)
nLocation[0:15]


Out[27]:
Brasil                          38
Rio de Janeiro                  36
São Paulo                       20
Brazil                          12
Setor 2814                      10
Porto Alegre                     8
SP                               7
Rio Grande do Sul                6
1d/ gays sim, flopados nunca     6
011                              6
Brasília                         6
Brazil                           5
Rj                               5
Maceió-AL                        5
São Paulo                        5
dtype: int64

Number of Documents by UNGP Location


In [28]:
# Reminder: Locations we are interested in are "IN", "PK", "NG", and "KE".
nUngpLocation = twitterData.UNGP_location.value_counts(normalize=False, sort=True, ascending=False, bins=None)
nUngpLocation[0:15]


Out[28]:
BR    718
US     41
FR     10
GB      8
AU      8
ES      5
MX      5
BH      4
IT      4
IN      3
GR      3
BF      3
PE      3
PG      3
CO      2
dtype: int64

In [37]:
# Getting Vincent ready 
vincent.initialize_notebook()
gpBlue='#00aeef'
gpLightGray='#96999b'
gpDarkBlue='#00447c'
gpRed='#cf5c42'
gpBrown='#e1d8ad'
gpPink='#f4d5e3'
gpLightBlue='#e1f4fd'



In [39]:
location_grouped = twitterData.groupby('UNGPLocation')
mean_location_grouped = location_grouped.mean().dropna()
mean_followers = mean_location_grouped.sort('Followers')['Followers']
followersBar = vincent.Bar(mean_followers)
followersBar.axis_titles(x='Country', y='Followers')
from vincent.axes import AxisProperties
from vincent.properties import PropertySet
from vincent.values import ValueRef
for axis in followersBar.axes:
    axis.properties = AxisProperties()
    for prop in ['ticks', 'axis', 'major_ticks', 'minor_ticks']:
        setattr(axis.properties, prop, PropertySet(stroke=ValueRef(value=gpLightGray)))
    axis.properties.title = PropertySet(font_size=ValueRef(value=20), 
                                        fill=ValueRef(value=gpLightGray))
    axis.properties.labels = PropertySet(fill=ValueRef(value=gpLightGray))
followersBar.axes[0].properties.labels.angle = ValueRef(value=0)
followersBar.axes[0].properties.labels.align = ValueRef(value='center')
followersBar.axes[0].properties.title.dy = ValueRef(value=20)
followersBar.scales[2].range = [gpBlue]
followersBar.to_json('../charts/followersBar.json')
followersBar


Out[39]:

In [40]:
location_grouped = twitterData.groupby('UNGPLocation')
mean_location_grouped = location_grouped.mean().dropna()
mean_friends = mean_location_grouped.sort('Friends')['Friends']
friendsBar = vincent.Bar(mean_friends)
friendsBar.axis_titles(x='Country', y='Friends')
for axis in friendsBar.axes:
    axis.properties = AxisProperties()
    for prop in ['ticks', 'axis', 'major_ticks', 'minor_ticks']:
        setattr(axis.properties, prop, PropertySet(stroke=ValueRef(value=gpLightGray)))
    axis.properties.title = PropertySet(font_size=ValueRef(value=20), 
                                        fill=ValueRef(value=gpLightGray))
    axis.properties.labels = PropertySet(fill=ValueRef(value=gpLightGray))
friendsBar.axes[0].properties.labels.angle = ValueRef(value=0)
friendsBar.axes[0].properties.labels.align = ValueRef(value='center')
friendsBar.axes[0].properties.title.dy = ValueRef(value=20)
friendsBar.scales[2].range = [gpDarkBlue]
friendsBar.to_json('../charts/friendsBar.json')
friendsBar


Out[40]:

In [41]:
location_grouped = twitterData.groupby('UNGPLocation')
mean_location_grouped = location_grouped.mean().dropna()
mean_genderProb = mean_location_grouped.sort('UNGPGenderProb')['UNGPGenderProb']
genderProb = vincent.Bar(mean_genderProb)
genderProb.axis_titles(x='Country', y='Average Gender Probablility')
for axis in genderProb.axes:
    axis.properties = AxisProperties()
    for prop in ['ticks', 'axis', 'major_ticks', 'minor_ticks']:
        setattr(axis.properties, prop, PropertySet(stroke=ValueRef(value=gpLightGray)))
    axis.properties.title = PropertySet(font_size=ValueRef(value=20), 
                                        fill=ValueRef(value=gpLightGray))
    axis.properties.labels = PropertySet(fill=ValueRef(value=gpLightGray))
genderProb.axes[0].properties.labels.angle = ValueRef(value=0)
genderProb.axes[0].properties.labels.align = ValueRef(value='center')
genderProb.axes[0].properties.title.dy = ValueRef(value=20)
genderProb.scales[2].range = [gpRed]
genderProb.to_json('../charts/genderProbBar.json')
genderProb


Out[41]:

In [42]:
mpld3.enable_notebook()
gatesCountry = twitterData.UNGPLocation.value_counts(normalize=False, sort=True, ascending=False, bins=None)
gatesCountryFig = gatesCountry.plot(kind='barh', color='#00aeef')
mpld3.display()


/usr/local/lib/python2.7/dist-packages/mpld3/mplexporter/exporter.py:82: UserWarning: Blended transforms not yet supported. Zoom behavior may not work as expected.
  warnings.warn("Blended transforms not yet supported. "
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-42-f2b9fce17639> in <module>()
      2 gatesCountry = twitterData.UNGPLocation.value_counts(normalize=False, sort=True, ascending=False, bins=None)
      3 gatesCountryFig = gatesCountry.plot(kind='barh', color='#00aeef')
----> 4 mpld3.display()

/usr/local/lib/python2.7/dist-packages/mpld3/_display.pyc in display(fig, closefig, local, **kwargs)
    288     if closefig:
    289         plt.close(fig)
--> 290     return HTML(fig_to_html(fig, **kwargs))
    291 
    292 

/usr/local/lib/python2.7/dist-packages/mpld3/_display.pyc in fig_to_html(fig, d3_url, mpld3_url, no_extras, template_type, figid, use_http, **kwargs)
    223 
    224     renderer = MPLD3Renderer()
--> 225     Exporter(renderer, close_mpl=False, **kwargs).run(fig)
    226 
    227     fig, figure_json, extra_css, extra_js = renderer.finished_figures[0]

/usr/local/lib/python2.7/dist-packages/mpld3/mplexporter/exporter.pyc in run(self, fig)
     47             import matplotlib.pyplot as plt
     48             plt.close(fig)
---> 49         self.crawl_fig(fig)
     50 
     51     @staticmethod

/usr/local/lib/python2.7/dist-packages/mpld3/mplexporter/exporter.pyc in crawl_fig(self, fig)
    114                                        props=utils.get_figure_properties(fig)):
    115             for ax in fig.axes:
--> 116                 self.crawl_ax(ax)
    117 
    118     def crawl_ax(self, ax):

/usr/local/lib/python2.7/dist-packages/mpld3/mplexporter/exporter.pyc in crawl_ax(self, ax)
    121                                      props=utils.get_axes_properties(ax)):
    122             for line in ax.lines:
--> 123                 self.draw_line(ax, line)
    124             for text in ax.texts:
    125                 self.draw_text(ax, text)

/usr/local/lib/python2.7/dist-packages/mpld3/mplexporter/exporter.pyc in draw_line(self, ax, line, force_trans)
    177         coordinates, data = self.process_transform(line.get_transform(),
    178                                                    ax, line.get_xydata(),
--> 179                                                    force_trans=force_trans)
    180         linestyle = utils.get_line_style(line)
    181         if linestyle['dasharray'] in ['None', 'none', None]:

/usr/local/lib/python2.7/dist-packages/mpld3/mplexporter/exporter.pyc in process_transform(transform, ax, data, return_trans, force_trans)
     94                                ("figure", ax.figure.transFigure),
     95                                ("display", transforms.IdentityTransform())]:
---> 96                 if transform.contains_branch(trans):
     97                     code, transform = (c, transform - trans)
     98                     break

AttributeError: 'BlendedGenericTransform' object has no attribute 'contains_branch'

In [70]:
import ggplot as gg
(ggplot(gg.aes(x='UNGPLocation'), data=twitterData)
+ gg.geom_bar() + gg.ggtitle("Gates Tweets")
+ gg.labs("Country", "Number of tweets"))


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-70-4d0126b5e38a> in <module>()
      2 (ggplot(gg.aes(x='UNGPLocation'), data=gatesData)
      3 + gg.geom_bar() + gg.ggtitle("Gates Tweets")
----> 4 + gg.labs("Country", "Number of tweets"))

TypeError: 'module' object is not callable

In [77]:
languagePlot = ggplot(aes(x='DataSiftLanguage'), data=twitterData) + geom_bar() + ggtitle("Language Distribution") + labs("Language", "Number of tweets")
languagePlot


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-77-44d726a8e92c> in <module>()
----> 1 languagePlot = ggplot(aes(x='DataSiftLanguage'), data=gatesData) + geom_bar() + ggtitle("Language Distribution") + labs("Language", "Number of tweets")
      2 languagePlot

NameError: name 'aes' is not defined

In [1]:
from IPython.core.display import HTML
styles = open("../css/custom.css", "r").read()
HTML(styles)


Out[1]: